The scRNA-seq dataset was processed and analyzed with Seurat version 2.3.4. Here, epithelial cell-derived fraction was identified from all dataset.
# Load libraries
library(Seurat) # version 2.3.4
library(useful) # Corner function
library(ggplot2)
library(dplyr)
library(knitr)
library(devtools)
library(cowplot)
# Load original functions
source("./Src/Visualization_for_object_of_Seurat_v2.r")# Load dataset and sample information
originaldata_genesymbol_sum <- read.table("./dataset/GSE147372_countsf_analysis_tpm_genesymbol_sum.txt",
sep = "\t", header = TRUE, row.names = "gene_symbol")
originaldata_genesymbol_sum <- originaldata_genesymbol_sum[,-1]
sample_info_new <- read.table("./dataset/sample_info_all.txt", sep = "\t", header = TRUE, row.names = 1)# Setup Seurat object
EWF <- CreateSeuratObject(raw.data = originaldata_genesymbol_sum,
min.cells = 3, min.genes = 200, project = "RamDA-seq_EWF")
# Add labels (mitochondrial genes)
mito.genes <- grep(pattern = "^mt-", x = rownames(x = EWF@data), value = TRUE)
percent.mito <- Matrix::colSums(EWF@raw.data[mito.genes, ])/Matrix::colSums(EWF@raw.data)
EWF <- AddMetaData(EWF, percent.mito, "percent.mito")
# Add labels (stages)
EWF@meta.data$stage <- factor(sample_info_new$stage, levels = c("E11.5", "E12.0", "E13.0", "E13.5", "E14.0", "E15.0", "E17.0"))
# Add labels (plates)
EWF@meta.data$plate <- factor(sample_info_new$plate_id[],
levels = c("E11_180312003", "E12_160809001", "E12_160809003", "E12_160809004", "E13_150126001",
"E13_160126002", "E13_160126004", "E13_161012002", "E13_161012003", "E14_161019002",
"E14_161019003", "E15_150126001", "E15_161014001", "E15_161014002", "E15_161014003",
"E17_161014001", "E17_161014002", "E17_161014003"))
dim(EWF@raw.data)## [1] 47667 1614
library(easyGgplot2)
p<-ggplot2.violinplot(EWF@meta.data$nGene,addDot=TRUE, dotSize=1.7,dotPosition="jitter",
jitter=0.2, addMean = TRUE, xShowTitle=FALSE, yShowTitle=FALSE, mainTitle="nGene")
p=p+theme_grey()+xlab("")+ylab("")+
theme(axis.text.x = element_blank(),axis.ticks.x = element_blank())+
geom_hline(yintercept = 16000, colour = "red")
p1<-ggplot2.violinplot(EWF@meta.data$percent.mito,addDot=TRUE, dotSize=1.7,dotPosition="jitter",
jitter=0.2, addMean = TRUE, xShowTitle=FALSE, yShowTitle=FALSE, mainTitle="percent.mito")
p1=p1+theme_grey()+xlab("")+ylab("")+
theme(axis.text.x = element_blank(),axis.ticks.x = element_blank())+
geom_hline(yintercept = 0.07, colour = "red")
plot_grid(p, p1)# Seurat: Filtering on Metadata
library(easyGgplot2)
p <- ggplot2.violinplot(EWF@meta.data, xName = "stage", yName = "nGene", addDot = TRUE,
dotSize = 1.7, dotPosition = "jitter", jitter = 0.2, addMean = TRUE,
xShowTitle = TRUE, yShowTitle = FALSE, mainTitle = "nGene")
p <- p + theme_grey() + xlab("") + ylab("") + geom_hline(yintercept = 16000, colour = "red")
p1 <- ggplot2.violinplot(EWF@meta.data, xName = "stage", yName = "percent.mito", addDot = TRUE,
dotSize = 1.7,dotPosition = "jitter", jitter = 0.2, addMean = TRUE,
xShowTitle = TRUE, yShowTitle = FALSE, mainTitle = "percent.mito")
p1 <- p1 + theme_grey() + xlab("") + ylab("") + geom_hline(yintercept = 0.07, colour = "red")
plot_grid(p, p1)EWF <- FindVariableGenes(object = EWF, mean.function = ExpMean, dispersion.function = LogVMR,
x.low.cutoff = 0.0125, x.high.cutoff = 3, y.cutoff = 1)##
## Time Elapsed: 1.51834220091502 mins
EWF <- RunPCA(object = EWF, pc.genes = EWF@var.genes, do.print = F)
EWF <- ProjectPCA(EWF, do.print = F)EWF <- JackStraw(object = EWF, num.replicate = 100, display.progress = FALSE)
JackStrawPlot(object = EWF, PCs = 1:20)## An object of class seurat in project RamDA-seq_EWF
## 47667 genes across 1595 samples.
# Check the cell types
myFeaturePlot(EWF, c("Krt5", "Krt14", "Epcam", "Itga6", # Epithelial cells
"Vim", "Col1a1", "Pdgfra", "Col3a1", # Mesenchymal cells
"Kdr", "Flt4", "Dll4", "Pecam1", # Blood vessel cells
"Krt8", "Krt20", # Merkel cells
"Sox2", "Corin"), # DP cells
nCol = 4, title.size = 25, pch.use = 21, pt.size = 4,
outline.size = 0.1, outline.color = "grey50")df <- data.frame(EWF@dr$tsne@cell.embeddings, ident = EWF@ident)
# extraction of minor population 1
selected.data1 <- subset(df, 3 < tSNE_1 & tSNE_1 < 4 & 23 < tSNE_2 & tSNE_2 < 24)
selected.cells1 <- row.names(selected.data1)
# extraction of minor population 2
selected.data2 <- subset(df, 6 < tSNE_1 & tSNE_1 < 9 & 10 < tSNE_2 & tSNE_2 < 11)
selected.cells2 <- row.names(selected.data2)
# extraction of minor population 3
selected.data3 <- subset(df, 13 < tSNE_1 & ident == "2")
selected.cells3 <- row.names(selected.data3)
plot_grid(
highlightTSNEPlot(EWF, pt.shape = 21, theme.grey = TRUE, highlight.cell = selected.cells1, title = "Group 1"),
highlightTSNEPlot(EWF, pt.shape = 21, theme.grey = TRUE, highlight.cell = selected.cells2, title = "Group 2"),
highlightTSNEPlot(EWF, pt.shape = 21, theme.grey = TRUE, highlight.cell = selected.cells3, title = "Group 3"),
ncol = 3, align = "hv")# Identification of minor populations
# Find differentially expressed genes in minor populations
newcells.markers01 <- FindMarkers(object = EWF, ident.1 = "6", ident.2 = "0", only.pos = TRUE,
min.pct = 0.25, thresh.use = 0.25, test.use = "roc")
newcells.markers02 <- FindMarkers(object = EWF, ident.1 = "7", ident.2 = "0", only.pos = TRUE,
min.pct = 0.25, thresh.use = 0.25, test.use = "roc")
cluster5.markers <- FindMarkers(object = EWF, ident.1 = "5", only.pos = TRUE,
min.pct = 0.25, thresh.use = 0.25, test.use = "roc")# Change the name and order of clusters
current.cluster.ids <- c(0, 1, 2, 3, 4, 5, 6, 7)
new.cluster.ids <- c("Cluster1", "Cluster2", "Cluster1", "Cluster1", "Cluster3", "Cluster4", "Cluster5", "Cluster6")
EWF@ident <- plyr::mapvalues(EWF@ident, from = current.cluster.ids, to = new.cluster.ids)
EWF@ident <- factor(EWF@ident, levels = c("Cluster1", "Cluster2", "Cluster3", "Cluster4", "Cluster5", "Cluster6"))# Data visualization with TSNEPlot
plot_grid(
myTSNEPlot(EWF, do.label = F, no.rect = F, title = "Cluster", pt.shape = 21, theme.grey = TRUE),
myTSNEPlot(EWF, group.by = "stage", do.label = F, no.rect = F, title = "Stage", pt.shape = 21, theme.grey = TRUE,
pt.color = c("#ff4000", "#ff8c00", "#ffe500", "#b2db11", "#1b9850", "#00d3cc", "#0188a8")),
ncol = 2, align = "v")# Data visualization with ViolinPlot
my.data <- FetchData(EWF, c("ident", "nGene", "stage",
c("Cdh1", "Cdh3", "Krt5", "Krt14", "Epcam", "Itga6",
"Vim", "Col1a1", "Pdgfra", "Col3a1", "Fbn1",
"Kdr", "Flt4", "Dll4", "Pecam1", "Tek",
"Sox10", "Lgi4", "Gpr17", "Cdh19", "Plp1",
"Lilrb4a", "Coro1a", "Ccl3", "Lyz2", "Fcer1g",
"Kit", "Dct", "Mitf", "Pmel", "Mcoln3")))
my.data_melt <- reshape2::melt(my.data, id.vars = c("nGene", "stage", "ident"), variable.name = "genes", na.rm = TRUE)
my.data_melt$genes <- factor(my.data_melt$genes,
levels = c("Cdh1", "Cdh3", "Krt5", "Krt14", "Epcam", "Itga6",
"Vim", "Col1a1", "Pdgfra", "Col3a1", "Fbn1",
"Kdr", "Flt4", "Dll4", "Pecam1", "Tek",
"Sox10", "Lgi4", "Gpr17", "Cdh19", "Plp1",
"Lilrb4a", "Coro1a", "Ccl3", "Lyz2", "Fcer1g",
"Kit", "Dct", "Mitf", "Pmel", "Mcoln3"))
p5 <- ggplot(my.data_melt, aes(x = ident, y = value, fill = ident))
p5 <- p5 + geom_violin(scale = "width", adjust = 1, trim = TRUE) + coord_flip()
p5 <- p5 + scale_x_discrete(limits = c("Cluster6", "Cluster5", "Cluster4", "Cluster3", "Cluster2", "Cluster1"))
p5 <- p5 + theme(panel.background = element_rect(fill = "white"),
strip.text.x = element_text(angle = 60, face = "italic", size = 15),
axis.text.y = element_text(angle = 0, hjust = 1, face = "bold", colour = "black", size = 15),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
panel.spacing.x = unit(0, "lines"),
panel.border = element_rect(color = "black", fill=NA, size = 0.5, linetype = 1),
strip.background = element_rect(colour = NA, fill = NA)) + guides(fill = FALSE)
p5 <- p5 + facet_grid(. ~ genes, scales = "free", switch = "y")
p5 <- p5 + xlab("") + ylab("")
p5